import scrapy

from topicdetail.items import TopicdetailItem


class CcubaiSpider(scrapy.Spider):
    name = "ccubai"
    allowed_domains = ["ccubai.com", "img7.igusoft.com", "pic-ture.tujidao.vip"]
    start_urls = [
        "https://www.ccubai.com/topicdetail-1.html",
        "https://www.ccubai.com/topicdetail-2.html",
        "https://www.ccubai.com/topicdetail-3.html",
        "https://www.ccubai.com/topicdetail-4.html"
        "https://www.ccubai.com/topicdetail-5.html"
    ]
    redirects_url = "https://pic-ture.tujidao.vip"

    def parse(self, response, **kwargs):
        img_list = response.xpath('/html/body/main/div[2]/div/div[*]')
        for img in img_list:
            image_url = img.xpath('./div/a/@href').extract_first()
            image_url = response.urljoin(image_url)  # 非特殊情况的url拼接

            yield scrapy.Request(
                url=response.urljoin(image_url),
                callback=self.parse2
            )

    def parse2(self, response, **kwargs):
        # image_url = response.xpath('//div[@id="showimg"]/a/a/img[@class="img"]/@src').extract_first()
        image_url = response.xpath('//*[@id="showimg"]/img/@src').extract_first()
        image_url = self.redirects_url + image_url.split("picuploads")[-1]
        items = TopicdetailItem()
        items['image_url'] = image_url
        yield items

        end_flag = response.xpath('//*[@id="page"]/span/@text').extract_first()
        if end_flag:
            end_flag1 = end_flag.split("/")[0]
            end_flag2 = end_flag.split("/")[1]
            if end_flag1 == end_flag2:
                return

        next_page_url = response.xpath('//*[@id="page"]/a[@class="nextpage"]/@href').extract_first()
        next_page_url = response.urljoin(next_page_url)
        yield scrapy.Request(
            url=next_page_url,
            callback=self.parse2
        )
